####################################################
####################################################
#FALSE POSITIVE AND FALSE NEGATIVE RATES
####################################################
####################################################
test2 <- read.csv("data/fl_ready_for_analysis.csv")
test2$sr.race.factor <- as.factor(test2$sr.race)

test3 <- test2[test2$predicted.race != "",]

test3$predicted.race.party <- ""
test3$predicted.race.party[test3$predicted.white.party == 1] <- "White"
test3$predicted.race.party[test3$predicted.black.party == 1] <- "Black"
test3$predicted.race.party[test3$predicted.latino.party == 1] <- "Hispanic"
test3$predicted.race.party[test3$predicted.asian.party == 1] <- "Asian"
test3$predicted.race.party[test3$predicted.other.party == 1] <- "Other"
test3$pr.race.factor <- as.factor(test3$predicted.race.party)

#TABLE 1 - F1-Score
library(yardstick)
f_meas(test3, truth = sr.race.factor, estimate = pr.race.factor)

#TABLE 1 - proportions of races across state
(table(test2$sr.race) / nrow(test2))*100

#TABLE 1 - Model with tract, surname, and party
#overall error rate
round(mean(test2$wrong.race.party, na.rm = T), 3)
#0.151

#false negative
round(mean(test2$wrong.race.party[test2$sr.race == "White"], na.rm = T), 3)
#0.067
round(mean(test2$wrong.race.party[test2$sr.race == "Black"], na.rm = T), 3)
#0.335
round(mean(test2$wrong.race.party[test2$sr.race == "Hispanic"], na.rm = T), 3)
#0.151
round(mean(test2$wrong.race.party[test2$sr.race == "Asian"], na.rm = T), 3)
#0.475
round(mean(test2$wrong.race.party[test2$sr.race == "Other"], na.rm = T), 3)
#0.997

#false positive
round(mean(test2$predicted.white.party[test2$sr.race != "White" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.247
round(mean(test2$predicted.black.party[test2$sr.race != "Black" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.029
round(mean(test2$predicted.latino.party[test2$sr.race != "Hispanic" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.037
round(mean(test2$predicted.asian.party[test2$sr.race != "Asian" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.008
round(mean(test2$predicted.other.party[test2$sr.race != "Other" & test2$sr.race != "Unknown"], na.rm = T), 3)
#00000



#TABLE A.1 - model with surname and census tract
#overall error rate
round(mean(test2$wrong.race, na.rm = T), 3)
#0.163
#false negative - i.e.  classifying a latino as non-latino
round(mean(test2$wrong.race[test2$sr.race == "White"], na.rm = T), 3)
#0.065
round(mean(test2$wrong.race[test2$sr.race == "Black"], na.rm = T), 3)
#0.435
round(mean(test2$wrong.race[test2$sr.race == "Hispanic"], na.rm = T), 3)
#0.146
round(mean(test2$wrong.race[test2$sr.race == "Asian"], na.rm = T), 3)
#0.475
round(mean(test2$wrong.race[test2$sr.race == "Other"], na.rm = T), 3)
#0.997

#false positive - i.e.  classifying a non-white as white
round(mean(test2$predicted.white[test2$sr.race != "White" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.287
round(mean(test2$predicted.black[test2$sr.race != "Black" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.025
round(mean(test2$predicted.latino[test2$sr.race != "Hispanic" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.037
round(mean(test2$predicted.asian[test2$sr.race != "Asian" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.009
round(mean(test2$predicted.other[test2$sr.race != "Other" & test2$sr.race != "Unknown"], na.rm = T), 3)
#00000

#TABLE A.1 - model with party, sex, surname, and tract
#overall error rate
round(mean(test2$wrong.race.party.sex, na.rm = T), 3)
#0.149

#false negative
round(mean(test2$wrong.race.party.sex[test2$sr.race == "White"], na.rm = T), 3)
#0.067
round(mean(test2$wrong.race.party.sex[test2$sr.race == "Black"], na.rm = T), 3)
#0.335
round(mean(test2$wrong.race.party.sex[test2$sr.race == "Hispanic"], na.rm = T), 3)
#0.152
round(mean(test2$wrong.race.party.sex[test2$sr.race == "Asian"], na.rm = T), 3)
#0.478
round(mean(test2$wrong.race.party.sex[test2$sr.race == "Other"], na.rm = T), 3)
#0.995

#false positive
round(mean(test2$predicted.white.party.sex[test2$sr.race != "White" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.245
round(mean(test2$predicted.black.party.sex[test2$sr.race != "Black" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.029
round(mean(test2$predicted.latino.party.sex[test2$sr.race != "Hispanic" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.036
round(mean(test2$predicted.asian.party.sex[test2$sr.race != "Asian" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.008
round(mean(test2$predicted.other.party.sex[test2$sr.race != "Other" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.001


#TABLE A.1 - model with party, sex, age, and tract
#overall error rate
round(mean(test2$wrong.race.party.sex.age, na.rm = T), 3)
#0.151

#false negative
round(mean(test2$wrong.race.party.sex.age[test2$sr.race == "White"], na.rm = T), 3)
#0.061
round(mean(test2$wrong.race.party.sex.age[test2$sr.race == "Black"], na.rm = T), 3)
#0.356
round(mean(test2$wrong.race.party.sex.age[test2$sr.race == "Hispanic"], na.rm = T), 3)
#0.164
round(mean(test2$wrong.race.party.sex.age[test2$sr.race == "Asian"], na.rm = T), 3)
#0.529
round(mean(test2$wrong.race.party.sex.age[test2$sr.race == "Other"], na.rm = T), 3)
#0.992

#false positive
round(mean(test2$predicted.white.party.sex.age[test2$sr.race != "White" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.261
round(mean(test2$predicted.black.party.sex.age[test2$sr.race != "Black" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.025
round(mean(test2$predicted.latino.party.sex.age[test2$sr.race != "Hispanic" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.035
round(mean(test2$predicted.asian.party.sex.age[test2$sr.race != "Asian" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.008
round(mean(test2$predicted.other.party.sex.age[test2$sr.race != "Other" & test2$sr.race != "Unknown"], na.rm = T), 3)
#0.001


#For FIGURE A.7 - generate a predicted race using a probability sample rather than the top probability
probs <- cbind(test2$pred.whi, test2$pred.bla, test2$pred.lat, test2$pred.asi, test2$pred.oth)
probs <- probs[!is.na(probs[,1]),]

set.seed(8675309)
prob.fun <- function(p) sample(x=c(1,2,3,4,5), size=1, prob=p)
aa <- apply(X = probs, MARGIN = 1, FUN = prob.fun)

test2$pred.race.prob <- NA
test2$pred.race.prob[!is.na(test2$pred.whi)] <- aa
test2$pred.race.prob[test2$pred.race.prob == 1] <- "White"
test2$pred.race.prob[test2$pred.race.prob == 2] <- "Black"
test2$pred.race.prob[test2$pred.race.prob == 3] <- "Hispanic"
test2$pred.race.prob[test2$pred.race.prob == 4] <- "Asian"
test2$pred.race.prob[test2$pred.race.prob == 5] <- "Other"
test2$wrong.race.prob <- ifelse(test2$pred.race.prob != test2$sr.race, 1, 0)


####################################################
####################################################
#ERRORS AND SOCIO-ECONOMIC FACTORS
####################################################
####################################################
test3 <- test2[test2$sr.race == "Black",]
test4 <- test2[test2$sr.race == "Hispanic",]
test5 <- test2[test2$sr.race == "Asian",]
test6 <- test2[test2$sr.race == "White",]


#FIGURE A.1 - overall wrong prediction and income
wrong.race.all <- tapply(X = test2$wrong.race, INDEX = test2$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test2$wrong.race, INDEX = test2$income_rounded, length)
m <- loess(wrong.race.all~names(wrong.race.all), weights = point.size, span = .6)

#7x9 size
plot(names(wrong.race.all), wrong.race.all, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "All Voters", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size)/5)
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(wrong.race.all), y=m$fitted, lwd = 4, col = "dark red")
box()


################################################
#FIGURE 1 - black wrong prediction and income

black.wrong.race <- tapply(X = test3$wrong.race, INDEX = test3$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$wrong.race, INDEX = test3$income_rounded, length)
m <- loess(black.wrong.race~names(black.wrong.race), weights = point.size, span = .6)

#7x9 size
plot(names(black.wrong.race), black.wrong.race, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Black Voters", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size)/5, )
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2, cex.axis = 1)
lines(x=names(black.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE 1 - Latino wrong prediction and income
latino.wrong.race <- tapply(X = test4$wrong.race, INDEX = test4$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$wrong.race, INDEX = test4$income_rounded, length)
m <- loess(latino.wrong.race~names(latino.wrong.race), weights = point.size, span = .6)

plot(names(latino.wrong.race), latino.wrong.race, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Hispanic Voters", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size)/5)
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE 1 - Asian wrong prediction and income
asian.wrong.race <- tapply(X = test5$wrong.race, INDEX = test5$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$wrong.race, INDEX = test5$income_rounded, length)
m <- loess(asian.wrong.race~names(asian.wrong.race), weights = point.size, span = .6)

plot(names(asian.wrong.race), asian.wrong.race, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Asian Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE 1 - White wrong prediction and income
white.wrong.race <- tapply(X = test6$wrong.race, INDEX = test6$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$wrong.race, INDEX = test6$income_rounded, length)
m <- loess(white.wrong.race~names(white.wrong.race), weights = point.size, span = .6)

plot(names(white.wrong.race), white.wrong.race, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "White Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#######################################################
#FIGURE A.3 - plot false positives (Black | ~Black)
black.given.not.black <- tapply(X = test2$predicted.black[test2$sr_black == 0], INDEX = test2$income_rounded[test2$sr_black == 0], function(x) mean(x, na.rm = T))
point.size2 <- tapply(X = test2$predicted.black[test2$sr_black == 0], INDEX = test2$income_rounded[test2$sr_black == 0], length)
m2 <- loess(black.given.not.black~names(black.given.not.black), weights = point.size2, span = .6)

plot(names(black.given.not.black), black.given.not.black, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Non-Black Voters Classified as Black", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size2)/5)
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(black.given.not.black), y=m2$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.4 - model with surname, tract, party, sex, age
black.wrong.race.psa <- tapply(X = test3$wrong.race.party.sex.age, INDEX = test3$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$wrong.race.party.sex.age, INDEX = test3$income_rounded, length)
m <- loess(black.wrong.race.psa~names(black.wrong.race.psa), weights = point.size, span = .6)

#7x9 size
plot(names(black.wrong.race.psa), black.wrong.race.psa, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Black Voters", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size)/5)
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(black.wrong.race.psa), y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.7 (second panel, top row) - average probability rather than mean of binary classification
black.wrong.race.pr <- tapply(X = test3$pred.bla, INDEX = test3$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$pred.bla, INDEX = test3$income_rounded, length)
m <- loess(black.wrong.race.pr~names(black.wrong.race.pr), weights = point.size, span = .6)
                              
plot(names(black.wrong.race.pr), black.wrong.race.pr, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Black Voters", ylab = "Average Probability of Being Black", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = rev(seq(0, 1, .1)), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()


#FIGURE A.7 (second panel, bottom row) probability draw to determine race, rather than highest probability
black.wrong.race.prob <- tapply(X = test3$wrong.race.prob, INDEX = test3$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$wrong.race.prob, INDEX = test3$income_rounded, length)
m <- loess(black.wrong.race.prob ~ names(black.wrong.race.prob), weights = point.size, span = .6)

plot(names(black.wrong.race.prob), black.wrong.race.prob, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Black Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()


############################################
#FIGURE A.3 - plot false positives (Latino | ~Latino)
latino.given.not.latino <- tapply(X = test2$predicted.latino[test2$sr_latino == 0], INDEX = test2$income_rounded[test2$sr_latino == 0], function(x) mean(x, na.rm = T))
point.size2 <- tapply(X = test2$predicted.latino[test2$sr_latino == 0], INDEX = test2$income_rounded[test2$sr_latino == 0], length)
m2 <- loess(latino.given.not.latino~names(latino.given.not.latino), weights = point.size2, span = .6)

plot(names(latino.given.not.latino), latino.given.not.latino, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Non-Hispanic Voters Classified as Hispanic", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size2)/5)
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(latino.given.not.latino), y=m2$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.4 - model with gender, party, age
latino.wrong.race.psa <- tapply(X = test4$wrong.race.party.sex.age, INDEX = test4$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$wrong.race.party.sex.age, INDEX = test4$income_rounded, length)
m <- loess(latino.wrong.race.psa~names(latino.wrong.race.psa), weights = point.size, span = .6)

plot(names(latino.wrong.race.psa), latino.wrong.race.psa, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Hispanic Voters", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size)/5)
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()


#FIGURE A.7 (third panel, top row) - average probability rather than mean of binary classification
latino.wrong.race.pr <- tapply(X = test4$pred.lat, INDEX = test4$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$pred.lat, INDEX = test4$income_rounded, length)
m <- loess(latino.wrong.race.pr~names(latino.wrong.race.pr), weights = point.size, span = .6)

plot(names(latino.wrong.race.pr), latino.wrong.race.pr, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Hispanic Voters", ylab = "Average Probability of Being Hispanic ", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at =  rev(seq(0, 1, .1)), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()


##FIGURE A.7 (third panel, bottom row) - probability draw to determine race 
latino.wrong.race.prob <- tapply(X = test4$wrong.race.prob, INDEX = test4$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$wrong.race.prob, INDEX = test4$income_rounded, length)
m <- loess(latino.wrong.race.prob ~ names(latino.wrong.race.prob), weights = point.size, span = .6)

plot(names(latino.wrong.race.prob), latino.wrong.race.prob, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Hispanic Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

############################################
#FIGURE A.3 - plot false positives (Asian | ~Asian)
asian.given.not.asian <- tapply(X = test2$predicted.asian[test2$sr_asian == 0], INDEX = test2$income_rounded[test2$sr_asian == 0], function(x) mean(x, na.rm = T))
point.size2 <- tapply(X = test2$predicted.asian[test2$sr_asian == 0], INDEX = test2$income_rounded[test2$sr_asian == 0], length)
m2 <- loess(asian.given.not.asian~names(asian.given.not.asian), weights = point.size2, span = .6)

plot(names(asian.given.not.asian), asian.given.not.asian, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Non-Asian Voters Classified as Asian", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size2)/5)
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(asian.given.not.asian), y=m2$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.4 - model with party, sex, and age
asian.wrong.race.psa <- tapply(X = test5$wrong.race.party.sex.age, INDEX = test5$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$wrong.race.party.sex.age, INDEX = test5$income_rounded, length)
m <- loess(asian.wrong.race.psa~names(asian.wrong.race.psa), weights = point.size, span = .6)

plot(names(asian.wrong.race.psa), asian.wrong.race.psa, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Asian Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.7 (fourth panel, top row) - average probability rather than mean of binary classification
asian.wrong.race.pr <- tapply(X = test5$pred.asi, INDEX = test5$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$pred.asi, INDEX = test5$income_rounded, length)
m <- loess(asian.wrong.race.pr~names(asian.wrong.race.pr), weights = point.size, span = .6)
                              
plot(names(asian.wrong.race.pr), asian.wrong.race.pr, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Asian Voters", ylab = "Average Probability of Being Asian", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = rev(seq(0, 1, .1)), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.7 (fourth panel, bottom row) - probability draw to determine race 
asian.wrong.race.prob <- tapply(X = test5$wrong.race.prob, INDEX = test5$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$wrong.race.prob, INDEX = test5$income_rounded, length)
m <- loess(asian.wrong.race.prob ~ names(asian.wrong.race.prob), weights = point.size, span = .6)

plot(names(asian.wrong.race.prob), asian.wrong.race.prob, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Asian Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

############################################
#FIGURE A.3 - plot false positives (White | ~White)
white.given.not.white <- tapply(X = test2$predicted.white[test2$sr_white == 0], INDEX = test2$income_rounded[test2$sr_white == 0], function(x) mean(x, na.rm = T))
point.size2 <- tapply(X = test2$predicted.white[test2$sr_white == 0], INDEX = test2$income_rounded[test2$sr_white == 0], length)
m2 <- loess(white.given.not.white~names(white.given.not.white), weights = point.size2, span = .6)

plot(names(white.given.not.white), white.given.not.white, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Non-White Voters Classified as White", ylab = "Misclassification Rate", pch = 16, col = "#99999999", axes = F, xlim = c(0, 220000), cex = log(point.size2)/5)
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(asian.given.not.asian), y=m2$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.4 - model with party, sex, and age
white.wrong.race.psa <- tapply(X = test6$wrong.race.party.sex.age, INDEX = test6$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$wrong.race.party.sex.age, INDEX = test6$income_rounded, length)
m <- loess(white.wrong.race.psa~names(white.wrong.race.psa), weights = point.size, span = .6)

plot(names(white.wrong.race.psa), white.wrong.race.psa, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "White Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.7 (first panel, top row) - average probability rather than mean of binary classification
white.wrong.race.pr <- tapply(X = test6$pred.whi, INDEX = test6$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$pred.whi, INDEX = test6$income_rounded, length)
m <- loess(white.wrong.race.pr~names(white.wrong.race.pr), weights = point.size, span = .6)

plot(names(white.wrong.race.pr), white.wrong.race.pr, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "White Voters", ylab = "Average Probability of Being White", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.7 (first panel, bottom row) - probability draw to determine race 
white.wrong.race.prob <- tapply(X = test6$wrong.race.prob, INDEX = test6$income_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$wrong.race.prob, INDEX = test6$income_rounded, length)
m <- loess(white.wrong.race.prob ~ names(white.wrong.race.prob), weights = point.size, span = .6)

plot(names(white.wrong.race.prob), white.wrong.race.prob, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "White Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()


##########################################################
####################FIGURE A.5 - GRID ####################
##########################################################

#EDUCATIONAL ATTAINMENT 
#black wrong prediction and education
black.educ.wrong.race <- tapply(X = test3$wrong.race, INDEX = test3$college_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$wrong.race, INDEX = test3$college_rounded, length)
m <- loess(black.educ.wrong.race~names(black.educ.wrong.race), weights = point.size, span = .6)

plot(names(black.educ.wrong.race), black.educ.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract with College Degree", main = "Proportion of Race Misclassification - Black Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(black.educ.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test3$pct_college, xlab = "Percent of Census Tract with College Degree", main = "College Degree - Black Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 500000))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, at = seq(0, 500000, 100000), labels = seq(0, 500, 100), las = 2)
box()

#Latino wrong prediction and education
latino.educ.wrong.race <- tapply(X = test4$wrong.race, INDEX = test4$college_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$wrong.race, INDEX = test4$college_rounded, length)
m <- loess(latino.educ.wrong.race~names(latino.educ.wrong.race), weights = point.size, span = .6)

plot(names(latino.educ.wrong.race), latino.educ.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract with College Degree", main = "Proportion of Race Misclassification - Hispanic Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(latino.educ.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test4$pct_college, xlab = "Percent of Census Tract with College Degree", main = "College Degree - Hispanic Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 500000))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, at = seq(0, 500000, 100000), labels = seq(0, 500, 100), las = 2)
box()

#Asian wrong prediction and education
asian.educ.wrong.race <- tapply(X = test5$wrong.race, INDEX = test5$college_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$wrong.race, INDEX = test5$college_rounded, length)
m <- loess(asian.educ.wrong.race~names(asian.educ.wrong.race), weights = point.size, span = .6)

plot(names(asian.educ.wrong.race), asian.educ.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract with College Degree", main = "Proportion of Race Misclassification - Asian Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(asian.educ.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test5$pct_college, xlab = "Percent of Census Tract with College Degree", main = "College Degree - Asian Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 100000))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, at = seq(0, 100000, 10000), labels = seq(0, 100, 10), las = 2)
box()

#White wrong prediction and education
white.educ.wrong.race <- tapply(X = test6$wrong.race, INDEX = test6$college_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$wrong.race, INDEX = test6$college_rounded, length)
m <- loess(white.educ.wrong.race~names(white.educ.wrong.race), weights = point.size, span = .6)

plot(names(white.educ.wrong.race), white.educ.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract with College Degree", main = "Proportion of Race Misclassification - White Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(white.educ.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test6$pct_college, xlab = "Percent of Census Tract with College Degree", main = "College Degree - White Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 2000000))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, at = seq(0, 2000000, 250000), labels = seq(0, 2000, 250), las = 2)
box()


## VOTE PROPENSITY ######################################
#black wrong prediction and vote propensity
black.vote.wrong.race <- tapply(X = test3$wrong.race, INDEX = test3$vote_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$wrong.race, INDEX = test3$vote_rounded, length)
m <- loess(black.vote.wrong.race~names(black.vote.wrong.race), weights = point.size, span = .6)

plot(names(black.vote.wrong.race), black.vote.wrong.race, ylim = c(0, 1), xlab = "Predicted Probability of Voting in 2016", main = "Proportion of Race Misclassification - Black Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 1, .1), at = seq(0, 1, .1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(black.vote.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test3$pred.vote, xlab = "Predicted Probability of Voting in 2016", main = "Vote Propensity - Black Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 700000))
axis(side = 1, labels = seq(0, 1, .10), at = seq(0, 1, .10))
axis(side = 2, at = seq(0, 700000, 100000), labels = seq(0, 700, 100), las = 2)
box()

#Latino wrong prediction and vote propensity
latino.vote.wrong.race <- tapply(X = test4$wrong.race, INDEX = test4$vote_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$wrong.race, INDEX = test4$vote_rounded, length)
m <- loess(latino.vote.wrong.race~names(latino.vote.wrong.race), weights = point.size, span = .6)

plot(names(latino.vote.wrong.race), latino.vote.wrong.race, ylim = c(0, 1), xlab = "Predicted Probability of Voting in 2016", main = "Proportion of Race Misclassification - Hispanic Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 1, .10), at = seq(0, 1, .10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(latino.vote.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test4$pred.vote, xlab = "Predicted Probability of Voting in 2016", main = "Vote Propensity - Hispanic Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 1200000))
axis(side = 1, labels = seq(0, 1, .10), at = seq(0, 1, .10))
axis(side = 2, at = seq(0, 1200000, 100000), labels = seq(0, 1200, 100), las = 2)
box()

#Asian wrong prediction and vote propensity
asian.vote.wrong.race <- tapply(X = test5$wrong.race, INDEX = test5$vote_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$wrong.race, INDEX = test5$vote_rounded, length)
m <- loess(asian.vote.wrong.race~names(asian.vote.wrong.race), weights = point.size, span = .6)

plot(names(asian.vote.wrong.race), asian.vote.wrong.race, ylim = c(0, 1), xlab = "Predicted Probability of Voting in 2016", main = "Proportion of Race Misclassification - Asian Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 1, .10), at = seq(0, 1, .10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(asian.vote.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test5$pred.vote, xlab = "Predicted Probability of Voting in 2016", main = "Vote Propensity - Asian Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 150000))
axis(side = 1, labels = seq(0, 1, .10), at = seq(0, 1, .10))
axis(side = 2, at = seq(0, 150000, 10000), labels = seq(0, 150, 10), las = 2)
box()

#White wrong prediction and vote propensity
white.vote.wrong.race <- tapply(X = test6$wrong.race, INDEX = test6$vote_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$wrong.race, INDEX = test6$vote_rounded, length)
m <- loess(white.vote.wrong.race~names(white.vote.wrong.race), weights = point.size, span = .6)

plot(names(white.vote.wrong.race), white.vote.wrong.race, ylim = c(0, 1), xlab = "Predicted Probability of Voting in 2016", main = "Proportion of Race Misclassification - White Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 1, .10), at = seq(0, 1, .10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(white.vote.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test6$pred.vote, xlab = "Predicted Probability of Voting in 2016", main = "Vote Propensity - White Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 3000000))
axis(side = 1, labels = seq(0, 1, .10), at = seq(0, 1, .10))
axis(side = 2, at = seq(0, 3000000, 250000), labels = seq(0, 3000, 250), las = 2)
box()


###DONATIONS########################

black.donations.wrong.race <- tapply(X = test3$wrong.race, INDEX = test3$log_donations_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$wrong.race, INDEX = test3$log_donations_rounded, length)
m <- loess(black.donations.wrong.race ~ names(black.donations.wrong.race), span = .6, weights = point.size)

plot(names(black.donations.wrong.race), black.donations.wrong.race, ylim = c(0, 1), xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Proportion of Race Misclassification - Black Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 5))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(black.donations.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test3$log_donations, xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Campaign Donations - Black Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 500000))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, at = seq(0, 500000, 100000), labels = seq(0, 500, 100), las = 2)
box()

#Latino wrong prediction and donations
latino.donations.wrong.race <- tapply(X = test4$wrong.race, INDEX = test4$log_donations_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$wrong.race, INDEX = test4$log_donations_rounded, length)
m <- loess(latino.donations.wrong.race ~ names(latino.donations.wrong.race), span = .6, weights = point.size)

plot(names(latino.donations.wrong.race), latino.donations.wrong.race, ylim = c(0, 1), xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Proportion of Race Misclassification - Latino Voters", ylab = "Proportion", pch = 16, cex = 1, col = "#99999999", axes = F, xlim = c(0, 5))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(latino.donations.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test4$log_donations, xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Campaign Donations - Latino Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 600000))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, at = seq(0, 600000, 100000), labels = seq(0, 600, 100), las = 2)
box()

#Asian wrong prediction and donations
asian.donations.wrong.race <- tapply(X = test5$wrong.race, INDEX = test5$log_donations_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$wrong.race, INDEX = test5$log_donations_rounded, length)
m <- loess(asian.donations.wrong.race ~ names(asian.donations.wrong.race), span = .6, weights = point.size)

plot(names(asian.donations.wrong.race), asian.donations.wrong.race, ylim = c(0, 1), xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Proportion of Race Misclassification - Asian Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 5))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(asian.donations.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test5$log_donations, xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Campaign Donations - Asian Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 200000))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, at = seq(0, 200000, 50000), labels = seq(0, 200, 50), las = 2)
box()

#white wrong prediction and donations
white.donations.wrong.race <- tapply(X = test6$wrong.race, INDEX = test6$log_donations_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$wrong.race, INDEX = test6$log_donations_rounded, length)
m <- loess(white.donations.wrong.race ~ names(white.donations.wrong.race), span = .6, weights = point.size)


plot(names(white.donations.wrong.race), white.donations.wrong.race, ylim = c(0, 1), xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Proportion of Race Misclassification - White Voters", ylab = "Proportion", pch = 16, cex = 1, col = "#99999999", axes = F, xlim = c(0, 5))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(white.donations.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test6$log_donations, xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Campaign Donations - White Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 1800000))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, at = seq(0, 1800000, 200000), labels = seq(0, 1800, 200), las = 2)
box()

######### HOME OWNERSHIP ########################### 
#black wrong prediction and home ownership
black.home.wrong.race <- tapply(X = test3$wrong.race, INDEX = test3$homeowner_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$wrong.race, INDEX = test3$homeowner_rounded, length)
m <- loess(black.home.wrong.race ~ names(black.home.wrong.race), span = .6, weights = point.size)

plot(names(black.home.wrong.race), black.home.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Homeowners", main = "Proportion of Race Misclassification - Black Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(black.home.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test3$pct_homeowner, xlab = "Percent of Census Tract who are Homeowners", main = "Homeownership - Black Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 500000))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, at = seq(0, 500000, 100000), labels = seq(0, 500, 100), las = 2)
box()

#Latino wrong prediction and home ownership
latino.home.wrong.race <- tapply(X = test4$wrong.race, INDEX = test4$homeowner_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$wrong.race, INDEX = test4$homeowner_rounded, length)
m <- loess(latino.home.wrong.race ~ names(latino.home.wrong.race), span = .6, weights = point.size)

plot(names(latino.home.wrong.race), latino.home.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Homeowners", main = "Proportion of Race Misclassification - Hispanic Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(latino.home.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test4$pct_homeowner, xlab = "Percent of Census Tract who are Homeowners", main = "Homeownership - Latino Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 500000))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, at = seq(0, 500000, 100000), labels = seq(0, 500, 100), las = 2)
box()

#Asian wrong prediction and home ownership
asian.home.wrong.race <- tapply(X = test5$wrong.race, INDEX = test5$homeowner_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$wrong.race, INDEX = test5$homeowner_rounded, length)
m <- loess(asian.home.wrong.race ~ names(asian.home.wrong.race), span = .6, weights = point.size)

plot(names(asian.home.wrong.race), asian.home.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Homeowners", main = "Proportion of Race Misclassification - Asian Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(asian.home.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test5$pct_homeowner, xlab = "Percent of Census Tract who are Homeowners", main = "Homeownership - Asian Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 100000))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, at = seq(0, 100000, 10000), labels = seq(0, 100, 10), las = 2)
box()

#White wrong prediction and home ownership
white.home.wrong.race <- tapply(X = test6$wrong.race, INDEX = test6$homeowner_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$wrong.race, INDEX = test6$homeowner_rounded, length)
m <- loess(white.home.wrong.race ~ names(white.home.wrong.race), span = .6, weights = point.size)

plot(names(white.home.wrong.race), white.home.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Homeowners", main = "Proportion of Race Misclassification - White Voters", ylab = "Proportion", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(white.home.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

hist(test6$pct_homeowner, xlab = "Percent of Census Tract who are Homeowners", main = "Homeownership - White Voters", ylab = "Frequency (in 1,000)", pch = 16, cex = .25, col = "#99999999", axes = F, ylim = c(0, 2000000))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, at = seq(0, 2000000, 250000), labels = seq(0, 2000, 250), las = 2)
box()



#######################################
#FIGURE A.6 (top row) - TRACT DIVERSITY
#######################################

#FIGURE A.6 (second panel, top row) black wrong prediction and home ownership
black.diversity.wrong.race <- tapply(X = test3$wrong.race, INDEX = test3$pct.black.tract_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test3$wrong.race, INDEX = test3$pct.black.tract_rounded, length)
m <- loess(black.diversity.wrong.race ~ names(black.diversity.wrong.race), span = .6, weights = point.size)

plot(names(black.diversity.wrong.race), black.diversity.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Black", main = "Black Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 1, .1), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=names(black.diversity.wrong.race), y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.6 (third panel, top row) Latino wrong prediction and home ownership
latino.diversity.wrong.race <- tapply(X = test4$wrong.race, INDEX = test4$pct.latino.tract_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test4$wrong.race, INDEX = test4$pct.latino.tract_rounded, length)
m <- loess(latino.diversity.wrong.race ~ names(latino.diversity.wrong.race), span = .6, weights = point.size)

plot(names(latino.diversity.wrong.race), latino.diversity.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Latino", main = "Hispanic Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 1, .1), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.6 (fourth panel, top row) Asian wrong prediction and home ownership
asian.diverstiy.wrong.race <- tapply(X = test5$wrong.race, INDEX = test5$pct.asian.tract_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test5$wrong.race, INDEX = test5$pct.asian.tract_rounded, length)
m <- loess(asian.diverstiy.wrong.race ~ names(asian.diverstiy.wrong.race), span = .6, weights = point.size)

plot(names(asian.diverstiy.wrong.race), asian.diverstiy.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Asian", main = "Asian Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 1, .1), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

#FIGURE A.6 (first panel, top row) White wrong prediction and home ownership
white.diversity.wrong.race <- tapply(X = test6$wrong.race, INDEX = test6$pct.white.tract_rounded, function(x) mean(x, na.rm = T))
point.size <- tapply(X = test6$wrong.race, INDEX = test6$pct.white.tract_rounded, length)
m <- loess(white.diversity.wrong.race ~ names(white.diversity.wrong.race), span = .6, weights = point.size)

plot(names(white.diversity.wrong.race), white.diversity.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are White", main = "White Voters", ylab = "Misclassification Rate", pch = 16, cex = log(point.size)/5, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 1, .1), cex.axis = .8)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 4, col = "dark red")
box()

################################################
## FIGURE A.10 - GENDER ########################
################################################

gender.wrong.race.white <- tapply(X = test2$wrong.race[test2$sr.race == "White"], INDEX = test2$sex[test2$sr.race == "White"], function(x) mean(x, na.rm = T))

gender.wrong.race.black <- tapply(X = test2$wrong.race[test2$sr.race == "Black"], INDEX = test2$sex[test2$sr.race == "Black"], function(x) mean(x, na.rm = T))

gender.wrong.race.latino <- tapply(X = test2$wrong.race[test2$sr.race == "Hispanic"], INDEX = test2$sex[test2$sr.race == "Hispanic"], function(x) mean(x, na.rm = T))

gender.wrong.race.asian <- tapply(X = test2$wrong.race[test2$sr.race == "Asian"], INDEX = test2$sex[test2$sr.race == "Asian"], function(x) mean(x, na.rm = T))


plot(0, 0, xlim = c(.75, 7.75), ylim = c(0, .65), cex = 0, axes = F, xlab = "", ylab = "Proportion", main = "Race Misclassification by Self Reported Race and Gender")
segments(x0 = c(1, 1.5), x1 = c(1, 1.5), y0 = c(0, 0), y1 = gender.wrong.race.white[2:1], lwd = 20, lend = 1)
segments(x0 = c(3, 3.5), x1 = c(3, 3.5), y0 = c(0, 0), y1 = gender.wrong.race.black[2:1], lwd = 20, lend = 1)
segments(x0 = c(5, 5.5), x1 = c(5, 5.5), y0 = c(0, 0), y1 = gender.wrong.race.latino[2:1], lwd = 20, lend = 1)
segments(x0 = c(7, 7.5), x1 = c(7, 7.5), y0 = c(0, 0), y1 = gender.wrong.race.asian[2:1], lwd = 20, lend = 1)
box()
axis(side = 1, at = c(1, 1.5, 3, 3.5, 5, 5.5, 7, 7.5), labels = c("F", "M", "F", "M", "F", "M", "F", "M"))
axis(side = 2, at = seq(0, .6, .1), las = 2)
text(1.25, .65, "White")
text(3.25, .65, "Black")
text(5.25, .65, "Hispanic")
text(7.25, .65, "Asian")
text(c(1, 1.5), gender.wrong.race.white[2:1]+.03, round(gender.wrong.race.white[2:1], 2), cex = .7)
text(c(3, 3.5), gender.wrong.race.black[2:1]+.03, round(gender.wrong.race.black[2:1], 2), cex = .7)
text(c(5, 5.5), gender.wrong.race.latino[2:1]+.03, round(gender.wrong.race.latino[2:1], 2), cex = .7)
text(c(7, 7.5), gender.wrong.race.asian[2:1]+.03, round(gender.wrong.race.asian[2:1], 2), cex = .7)
abline(h=0)
abline(v = 2.25, lty = 2, col = "grey")
abline(v = 4.25, lty = 2, col = "grey")
abline(v = 6.25, lty = 2, col = "grey")


#rates of interracial marriage 
#https://www.pewresearch.org/social-trends/2017/05/18/1-trends-and-patterns-in-intermarriage/
men <- c(12, 24, 26, 21)/100
women <- c(10, 12, 28, 36)/100

plot(0, 1, cex = 0, xlim = c(.75, 4.25), ylim = c(0, .5), axes = F, xlab = "", ylab = "Proportion", main = "Rates of Interracial Marriage by Race and Gender")
segments(x0=.85:3.85, x1=.85:3.85, y0=0, y1 = women, lend = 1, lwd = 20)
segments(x0=1.15:4.15, x1=1.15:4.15, y0=0, y1 = men, lend = 1, lwd = 20)
axis(side = 1, at = .85:3.85, labels = c("F", "F", "F", "F"))
axis(side = 1, at = 1.15:4.15, labels = c("M", "M", "M", "M"))
axis(side = 2, at = seq(0, 1, .1), las = 2)
text(.85:3.85, women+.01, women, cex = .6)
text(1.15:4.15, men+.01, men, cex = .6)
text(1, .5, "White")
text(2, .5, "Black")
text(3, .5, "Hispanic")
text(4, .5, "Asian")
box()
abline(v = 1.5, lty = 2, col = "grey")
abline(v = 2.5, lty = 2, col = "grey")
abline(v = 3.5, lty = 2, col = "grey")
abline(h=0)


